# https://www.bilibili.com/video/BV1W7411G7wP/?p=20&spm_id_from=pageDriver&vd_source=8bd7b24b38e3e12c558d839b352b32f4
# 20. 今天带你玩数据消消乐（定位、消除重复数据）
import pandas as pd

# 去重
students = pd.read_excel('../resources/input_py_tools/020/Students_Duplicates.xlsx')
print(students)
students.drop_duplicates(subset='Name', inplace=True) # keep='first'
#students.drop_duplicates(subset='Name', inplace=True, keep='last')
print(students)

# 标记哪些是重复的
students = pd.read_excel('../resources/input_py_tools/020/Students_Duplicates.xlsx')
dupe = students.duplicated(subset='Name')
print(dupe)# 标记列表
print(dupe.any())# 看一下有无重复数据
# 谁是重复数据
dupe = dupe[dupe == True]
print(dupe)
print(dupe.index)
print(students.iloc[dupe.index])